import re
import requests
from bs4 import BeautifulSoup as bs
from pyquery import PyQuery as pq
import json
from sql_helper import MySqlHelper
USER_AGENT = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36'
headers = {
    'User-Agent':USER_AGENT
}


def read_jd_list(url, item_dict_all=None):
    if item_dict_all is None:
        item_dict_all = list()
    response = requests.get(url, headers=headers)
    if response.status_code == 200:
        html = pq(response.text)
        titles = html('#main > div > div.job-list > ul > li > div > div.info-primary > h3 > a > div.job-title').items()
        rmbs = html('#main > div > div.job-list > ul > li > div > div.info-primary > h3 > a > span').items()
        gongsi = html('#main > div > div.job-list > ul > li > div > div.info-company > div > h3 > a').items()
        diqu = html('#main > div > div.job-list > ul > li > div > div.info-primary > p').items()
        times = html('#main > div > div.job-list > ul > li > div > div.info-publis > p').items()
        users = html('#main > div > div.job-list > ul > li > div > div.info-publis > h3').items()
        contents = html('#main > div > div.job-list > ul > li > div > div.info-primary > h3 > a').items()
        for title, rmb, gs, dq, time, user, content_url in zip(titles, rmbs, gongsi, diqu, times, users, contents):
            list_1 = list()
            res = re.compile('<p>(.*?)<em', re.S)
            diq = re.findall(res, str(dq))
            res_2 = re.compile('/>(.*?)<em', re.S)
            jingyan = re.findall(res_2, str(dq))
            res_3 = re.compile('<em.*?/>(.*?)</p>', re.S)
            xueli = re.findall(res_3, str(dq))
            list_1.append(title.text())  # 标题
            list_1.append(rmb.text())  # 工资
            list_1.append(gs.text())  # 公司
            list_1.append(diq[0].strip())  # 地区
            list_1.append(jingyan[0].strip())  # 工龄
            list_1.append(xueli[0][-2:])  # 学历
            list_1.append(time.text())  # 时间
            list_1.append(user.text())  # 发布人
            list_1.append('https://www.zhipin.com'+content_url.attr('href'))  # 详情页
            item_dict_all.append(list_1)
    else:
        print("你抓的出问题了！")
    return item_dict_all




if __name__ == "__main__":
    for i in range(1, 11):
        url = "https://www.zhipin.com/c100010000-p100109/?page=" + str(i)
        item_dict_all = read_jd_list(url)
        mysql = MySqlHelper()
        sql = "INSERT INTO boos(title, rmb, gs, diqu, gongling, xueli, time, user, url) VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s)"
        mysql.exec_many(sql, item_dict_all)
